package com.health.util;

import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;

/**
 * @Description 参考 https://iowiki.com/pdfbox/pdfbox_quick_guide.html
 * @Author konglz
 * @Data 2023/4/14 13:14
 */
public class PDFUtil {

    public static void main(String[] args) {
        String test = "";
        File file = new File("C:\\Users\\18225\\Desktop\\test.pdf");
        FileInputStream in = null;
        try {
            in =  new FileInputStream(file);
            RandomAccessRead randomAccessRead = new RandomAccessBufferedFileInputStream(in);
            PDFParser parser = new PDFParser(randomAccessRead);
            parser.parse();
            PDDocument pdDocument = parser.getPDDocument();
            PDFTextStripper stripper = new PDFTextStripper();
            test = stripper.getText(pdDocument);
//            System.out.println(test);

        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        }
        System.out.println(test);
    }


}
